from vllm import LLM, SamplingParams

# 定义输入提示列表和采样参数
prompts = [
    "你好，我的名字是",
    "美国总统是",
    "法国的首都是",
    "人工智能的未来是",
]
# model_path = '/gemini/pretrain/Qwen2.5-0.5B-Instruct'
model_path = '/gemini/pretrain2/Qwen2.5-3B-Instruct'



# 初始化 vLLM 
print('==========开始加载模型...=============')
llm = LLM(model = model_path)
print('===============模型加载OK===============')

# 调用 generate 方法生成输出
print('开始推理...')
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
outputs = llm.generate(prompts, sampling_params)
print('推理OK!!!!!!')
# 打印输出结果
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"提示: {prompt!r}, 生成文本: {generated_text!r}")

